Giotto results are placed in the directory “/data/Giotto_AD_codes/analysis/”, data is read from the directory “/data/Giotto_AD_codes/analysis/” Input raw expression data was downloaded from GEO data set GSE152506. Four spatial transcriptomics samples were extracted and bundled together (Two ALzheimer’s model samples: N04_D1 and N05_C2, and two WT samples: B04_D1 and B05_D2). Their spatial coordinates were shifted so that they can be analyzed side by side. Only protein coding genes were included. Gene names were put in upper case because of the format of cell type enrichment reference data. The compiled data for this tutorial can be also found at: https://www.synapse.org/#!Synapse:syn26484805. Cells / spots / spatial spots are sometimes used interchangeably
rm(list = ls())
library(data.table)
library(Giotto)
packageVersion("Giotto")
## [1] '1.0.4'
## Giotto output is saved in the results_folder
results_folder = "/data/Giotto_AD_codes/analysis/"
dir.create(results_folder)
## Warning in dir.create(results_folder): '/data/Giotto_AD_codes/analysis' already
## exists
## Directory where data is stored
data_path="/data/Giotto_AD_codes/ST_data/"
## Specified Python path is optional, otehrwise the default Python version will be used.
python_path = NULL
if (F) python_path = "/usr/local/bin/python3"
## Giotto instructions
instrs = createGiottoInstructions(save_dir = results_folder,
save_plot = TRUE,
show_plot = TRUE,
python_path = python_path)
##
## no external python path or giotto environment was specified, will check if a default python path is available
##
## A default python path was found: /usr/bin/python3 and will be used
##
## If this is not the correct python path, either
##
## 1. use installGiottoEnvironment() to install a local miniconda python environment along with required modules
##
## 2. provide an existing python path to python_path to use your own python path which has all modules installed
## Read in the expression and spatial position data and create a Giotto object
gg1 <- createGiottoObject(raw_exprs = paste0(data_path,"raw_expression_countsUC.txt"),
spatial_locs = paste0(data_path,"spot_locations.txt"),
instructions = instrs)
## Consider to install these (optional) packages to run all possible Giotto commands for spatial analyses: RTriangle FactoMiner
## Giotto does not automatically install all these packages as they are not absolutely required and this reduces the number of dependencies
## Warning in evaluate_spatial_locations(spatial_locs = spatial_locs, cores = cores, : There are non numeric or integer columns for the spatial location input at column position(s): 1
## The first non-numeric column will be considered as a cell ID to test for consistency with the expression matrix
## Other non numeric columns will be removed
## Positions of spatial transctiptomic spots
spatPlot(gobject = gg1, point_alpha = 0.7, save_param = list(save_name = '2_a_spatplot_image'), point_size=2.5)
## Show cell (spatial spot) metadata
pDataDT(gg1)
## cell_ID
## 1: B05_D2__20_25
## 2: B05_D2__16_9
## 3: B05_D2__6_28
## 4: B05_D2__19_4
## 5: B05_D2__17_20
## ---
## 1994: N04_D1__12_26
## 1995: N04_D1__27_18
## 1996: N04_D1__13_27
## 1997: N04_D1__28_21
## 1998: N04_D1__19_30
## Plot the distributions of gene and cell counts in order to determine the optimal filtering parameters
filterDistributions(gg1, detection = 'genes', save_param = list(save_name="1_a_filterDistributions_genes"))
filterDistributions(gg1, detection = 'cells', save_param = list(save_name="1_b_filterDistributions_spots"))
filterCombinations(gg1,
expression_thresholds = c(1),
gene_det_in_min_cells = c(15, 20, 25, 30),
min_det_genes_per_cell = c(300, 300, 300, 300),
save_param = list(save_name="1_c_filterCombinations"))
## $results
## threshold gene_detected_in_min_cells min_detected_genes_per_cell combination
## 1: 1 15 300 15-300
## 2: 1 20 300 20-300
## 3: 1 25 300 25-300
## 4: 1 30 300 30-300
## removed_genes removed_cells
## 1: 4548 0
## 2: 5179 0
## 3: 5632 0
## 4: 5996 0
##
## $ggplot
## Apply filtering
gg1 <- filterGiotto(gobject = gg1,
expression_threshold = 1,
gene_det_in_min_cells = 20,
min_det_genes_per_cell = 300,
expression_values = c('raw'),
verbose = T)
## Number of cells removed: 0 out of 1998
## Number of genes removed: 5179 out of 20249
## Normalization of expression data
gg1 <- normalizeGiotto(gobject = gg1, scalefactor = 6000, verbose = T)
##
## first scale genes and then cells
## Add statistical info to metadata
gg1 <- addStatistics(gobject = gg1)
## Plot the distribution of gene counts per spot
spatPlot2D(gobject = gg1, point_alpha = 0.7, point_size=2.5, show_image=F,
cell_color = 'nr_genes', color_as_factor = F,
save_param = list(save_name = '2_e_nr_genes'))
Highly variable genes are identified and used for dimension reduction using PCA and UMAP/tSNE routine. Spatial networks are then created with nearest neighboring spots, followed by Leiden clustering on the resulting spatial network.
## Identify highly variable genes
gg1 <- calculateHVG(gobject = gg1,
save_param = list(save_name = '3_a_HVGplot'))
## return_plot = TRUE and return_gobject = TRUE
##
## plot will not be returned to object, but can still be saved with save_plot = TRUE or manually
gene_metadata = fDataDT(gg1)
featgenes = gene_metadata[hvg == 'yes' & perc_cells > 3 & mean_expr_det > 0.4]$gene_ID
## Run PCA analysis
gg1 <- runPCA(gobject = gg1,
genes_to_use = featgenes,
scale_unit = F, center = T,
method="factominer")
## a custom vector of genes will be used to subset the matrix
screePlot(gg1, ncp = 30, save_param = list(save_name = '3_b_screeplot'))
## PCA with name: pca already exists and will be used for the screeplot
plotPCA(gobject = gg1,
save_param = list(save_name = '3_c_PCA_reduction'))
## Run UMAP and tSNE on PCA space, and plot the output
gg1 <- runUMAP(gg1, dimensions_to_use = 1:10)
plotUMAP(gobject = gg1,
save_param = list(save_name = '3_d_UMAP_reduction'))
gg1 <- runtSNE(gg1, dimensions_to_use = 1:10)
plotTSNE(gobject = gg1,
save_param = list(save_name = '3_e_tSNE_reduction'))
## Create nearest neighbor network
gg1 <- createNearestNetwork(gobject = gg1, dimensions_to_use = 1:10, k = 15)
## Perform Leiden clustering of spots
gg1 <- doLeidenCluster(gobject = gg1, resolution = 0.15, n_iterations = 1000)
plotUMAP(gobject = gg1,
cell_color = 'leiden_clus', show_NN_network = T, point_size = 2,
save_param = list(save_name = '4_a_UMAP_leiden.r0.15'))
spatPlot(gg1, cell_color = 'leiden_clus', point_size=2.5, show_image=F,
save_param = list(save_name = '4_b_leiden.r0.15'))
## Co-representation of UMAP and spatial positions of spots, plotting Leiden clusters and gene number
spatDimPlot(gobject = gg1, cell_color = 'leiden_clus',
dim_point_size = 2, spat_point_size = 2,
save_param = list(save_name = '5_a_covis_leiden_r0.15', base_width=7,base_height=12))
spatDimPlot(gobject = gg1, cell_color = 'nr_genes', color_as_factor = F,
dim_point_size = 2, spat_point_size = 2,
save_param = list(save_name = '5_b_nr_genes', base_width=7,base_height=12))
## First produce the signature matrix from an existing single cell sequencing data set.
# Read in the single cell matrix
single_cell_DT = fread("/data/Giotto_AD_codes/ST_data/zeisel_sc_data/zeisel_gnxp_norm_matched_gene_PC.txt")
single_cell_matrix = Giotto:::dt_to_matrix(single_cell_DT)
# Read in the single cell annotation vector
z.cells1 = readRDS("/data/Giotto_AD_codes/ST_data/zeisel_sc_data/zeisel_meta_no_outliers_gene.rds")
cell_annotations = as.vector(z.cells1[,"celltypes_main"])
# Create a signature matrix
rank_matrix = makeSignMatrixRank(sc_matrix = single_cell_matrix, sc_cluster_ids = cell_annotations)
# Compute cell type enrichment for each spatial spot
gg1 = runSpatialEnrich(gg1, sign_matrix = rank_matrix, enrich_method = 'rank')
# Define cell type subset of interest and plot the enrichment scores
cell_types_subset = colnames(gg1@spatial_enrichment$rank)[c(2:4,6,7)]
spatCellPlot(gobject = gg1,
spat_enr_names = 'rank',cell_annotation_values = cell_types_subset,
cow_n_col = 2,coord_fix_ratio = NULL, point_size = 1.25,
save_param = list(save_name="7_b_spatcellplot_1_rank"))
## To create individual cell type enrichment plots, add enrichment scores to metadata and plot each cell type
pDataDT(gg1)
## cell_ID nr_genes perc_genes total_expr leiden_clus
## 1: B05_D2__20_25 3164 20.99536 3913.770 1
## 2: B05_D2__16_9 3432 22.77372 3953.507 3
## 3: B05_D2__6_28 4519 29.98673 4192.702 1
## 4: B05_D2__19_4 4346 28.83875 4281.859 1
## 5: B05_D2__17_20 6558 43.51692 4737.481 2
## ---
## 1994: N04_D1__12_26 6995 46.41672 4692.221 4
## 1995: N04_D1__27_18 4886 32.42203 4507.649 2
## 1996: N04_D1__13_27 5670 37.62442 4562.755 4
## 1997: N04_D1__28_21 6193 41.09489 4641.090 2
## 1998: N04_D1__19_30 5073 33.66291 4412.915 2
d.rank = data.frame(gg1@spatial_enrichment$rank)[,cell_types_subset]
gg1 = addCellMetadata(gg1,new_metadata=d.rank)
for (ct in cell_types_subset){
nm = paste0("rank_",ct)
spatPlot2D(gobject = gg1, point_alpha = 1, point_size=2.5, show_image=F,
cell_color = ct, color_as_factor = F,
save_param = list(save_name = nm))
}
## Create and plot spatial network
gg1 <- createSpatialNetwork(gobject = gg1,
method = 'kNN', k = 8,
maximum_distance_knn = 3,
name = 'spatial_network')
showNetworks(gg1)
## The following images are available: spatial_network
## [1] "spatial_network"
spatPlot(gobject = gg1, show_network = T, point_size = 2,
network_color = 'blue', spatial_network_name = 'spatial_network',
save_param = list(save_name = '9_a_knn_network'))
## Identify spatial genes using rank binarization, and plot several top scoring genes
ranktest = binSpect(gg1, bin_method = 'rank',
calc_hub = T, hub_min_int = 5,
spatial_network_name = 'spatial_network')
##
## This is the single parameter version of binSpect
## 1. matrix binarization complete
##
## 2. spatial enrichment test completed
##
## 3. (optional) average expression of high expressing cells calculated
##
## 4. (optional) number of high expressing cells calculated
spatGenePlot(gg1, expression_values = 'scaled',
genes = ranktest$genes[1:6], cow_n_col = 2, point_size = 1.5,
save_param = list(save_name = '10_b_spatial_genes_rank'))
## Identify sets of spatially co-expressed genes
ext_spatial_genes = ranktest[1:300,]$gene
spat_cor_netw_DT = detectSpatialCorGenes(gg1,
method = 'network',
spatial_network_name = 'spatial_network',
subset_genes = ext_spatial_genes)
## Cluster spatially co-expressed genes
spat_cor_netw_DT = clusterSpatialCorGenes(spat_cor_netw_DT, name = 'spat_netw_clus', k = 10)
## Visualize spatial clusters
heatmSpatialCorGenes(gg1,
spatCorObject = spat_cor_netw_DT,
use_clus_name = 'spat_netw_clus',
heatmap_legend_param = list(title = NULL, top_annotation = "Coexpr.clust"),
save_param = list(save_name="10_c_heatmap_v300.10",
base_height = 12, base_width = 13, units = 'cm'))
## Extract and organize clusters of spatially co-expressed genes
table(spat_cor_netw_DT$cor_clusters$spat_netw_clus)
##
## 1 2 3 4 5 6 7 8 9 10
## 36 40 47 25 22 43 23 34 13 17
coexpr_dt = data.table::data.table(genes = names(spat_cor_netw_DT$cor_clusters$spat_netw_clus),
cluster = spat_cor_netw_DT$cor_clusters$spat_netw_clus)
data.table::setorder(coexpr_dt, cluster)
## Read in the list of PIG genes from Chen et al, 2020
pig = toupper(readLines("/data/Giotto_AD_codes/ST_data/PIGgenes_Chen2020.txt"))
## Compute overlaps between PIG genes and spatial gene clusters
pig.overlaps = data.frame()
for (i in min(coexpr_dt[,"cluster"]):max(coexpr_dt[,"cluster"])){
pig.overlaps[i,"clust.size"] = coexpr_dt[cluster==i,length(genes)]
pig.overlaps[i,"overlap"] = length(intersect(pig,coexpr_dt[cluster==i,][["genes"]]))
}
pig.overlaps
## clust.size overlap
## 1 36 0
## 2 40 0
## 3 47 0
## 4 25 0
## 5 22 0
## 6 43 20
## 7 23 0
## 8 34 0
## 9 13 0
## 10 17 0
## Add the mean scaled expression of genes from a relevant cluster to metadata, and plot it
sig6 = coexpr_dt[cluster==6,genes]
spat.clust.6.mean = apply(gg1@norm_scaled_expr[sig6,],2,mean)
gg1 = addCellMetadata(gg1,new_metadata=spat.clust.6.mean)
spatPlot2D(gobject = gg1, point_alpha = 0.7, point_size=2.5, show_image=F,
cell_color = 'spat.clust.6.mean', color_as_factor = F,
save_param = list(save_name = '11_sp.clust.6.mean'))